* LAD 

* THIS FILE CLEANS ALL THE LAD INFORMATION 
* GETS PERCENTILES FOR HOUSEHOLD AND IND INCOME




foreach i of numlist 1982/2019{

	cd "G:\LAD_All_years\LAD_Allyear_v6\data_donnees\stata"

	
	* use TIRC that is CRA income definition and what connolly uses in JOLE 
	use FIN* FAMWGT WGT* LIN* TIRC* AGE_* T4E__I* EI__* INDFL* TNKID* SXCO*  using "lad_dal_`i'_f1_v1.dta", clear

	* first we have to create a new weight and *5 (confirmed! and confirmed pweight)
	gen newweight=WGT2_I*5 
	* ages: just get working age : over 16
	keep if AGE__I`i'>16

	* drop if ind income <0 
	* drop if partner income <0 
	*don't want child tax filers
			drop if INDFLI`i'==5| INDFLI`i'==6

    drop if TIRC_I`i'<0
	
	foreach j of numlist 17/80{
		**************** At the individual level first: 
		preserve 
		
		* ages: keep if the age group is equal to the value `j'
		keep if AGE__I`i'==`j'
		
		* this gets number of people per age group
		gen countobvs=_N
		* TIRC: 
		* individual income
		pctile pctindTIRC=TIRC_I`i' [pweight=newweight], nq(100)


		* Individual Employment Earnings 
		pctile pctindemp=EI___I`i' [pweight=newweight], nq(100)

		* Individual t4earnings
		pctile pctindearn=T4E__I`i' [pweight=newweight], nq(100)
		
		
		keep pctind* countobvs
		gen pct=_n
		drop if pctindTIRC==.
		
		
		gen age=`j'
		
		cd "H:\Zheng_10223\Joint\LAD\ind"

		outsheet using "indpct`i'_age`j'.csv", comma replace
		restore 
	}
	
	********* Parents
	* keep only those with kids and drop negative household income
		drop if TIRC_F`i'<0

	drop if TNKIDI`i'<1
	
	
	* ages: keep if the age group is equal to the value `j'
		

		
		
		gen mainageparent=AGE__I`i' if  INDFLI`i'==1|INDFLI`i'==2
		replace mainageparent=AGE__P`i' if   INDFLI`i'==3|INDFLI`i'==4
		
		* if lone parent:  
		replace mainageparent=AGE__I`i' if INDFLI`i'==7
		
		
		* get one family 
		* drop remaining duplicates
		bysort FIN__I`i': gen countfam=_n
		drop if countfam>1
		drop countfam
		
		foreach j of numlist 17/80{
		**************** At the parent level first: 
		preserve 
		
		


			* keep if the max spouse age is equal to `j'
		keep if mainageparent==`j'
		
		
		

		
		* this gets number of people per age group
		gen countobvs=_N
		* TIRC: 
		* parent income 
		pctile pctparentTIRC=TIRC_P`i' [pweight=newweight], nq(100)


		
		
		keep pctparent* countobvs
		gen pct=_n
		drop if pctparentTIRC==.
		
		
		gen age=`j'
		
		cd "H:\Zheng_10223\Joint\LAD\parents"

		outsheet using "parentpct`i'_age`j'.csv", comma replace
		restore 
	}
	
	
	********************** Then Family *********************
	* get one observation per family 
	* generate a counter for each family observation
	
		cd "H:\Zheng_10223\Joint\LAD\hh"

	*main parent age is man if they are married
		gen mainageparenthh=AGE__I`i' if  INDFLI`i'==1|INDFLI`i'==2
		replace mainageparenthh=AGE__P`i' if   INDFLI`i'==3|INDFLI`i'==4
		
		* if lone parent: then own sex 
		replace mainageparenthh=AGE__I`i' if INDFLI`i'==7
		

	* get one family 
		* drop remaining duplicates
		bysort FIN__I`i': gen countfam=_n
		drop if countfam>1
		drop countfam

	foreach j of numlist 17/80{
		preserve
		keep if mainageparenthh==`j'
		
		gen countobvs=_N
	
		***TIRC 
		* Family Income 
		pctile pcthhTIRC=TIRC_F`i' [pweight=FAMWGT], nq(100)
		
		
		drop if pcthhTIRC==.
	
		gen pct=_n 
	
		keep pct* count*
		
		gen maxage=`j'

		outsheet using "hhpct`i'_maxspouseage`j'.csv", comma replace 
		restore
		
	}


	
	
}
